Loading packages for the plots

library(ggplot2)
library(plotly)
library(flexdashboard)
library(dplyr)
library(tidyverse)
library(leaflet)
library(knitr)

Reading in Cleaned Data

alcohol_data_2007 = read_csv("./data/PRAM_2007_alcohol.csv")

tobacco_data_2007 = read_csv("./data/PRAM_2007_tobacco.csv")

no_contraception_data_2007 = read_csv("./data/PRAM_2007_no_contraception.csv")

infant_mortality_df = read_csv("./data/PRAM_2007_infantmortality.csv")

maternal_race = read_csv("./data/PRAM_2007_Maternal_Race.csv")

# cleaned alcohol data 
cleaned_alc_2007 <- alcohol_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_std_err, -data_value_type) |>
  filter(response != "DRINKER WHO QUIT") |>
  filter(response != "NONDRINKER") |>
  filter( response != "NO") |>
  drop_na(response,geolocation) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses

# cleaned tobacco data 

cleaned_tobac_2007 <- tobacco_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_type) |>
  filter(response != "SMOKER WHO QUIT") |>
  filter(response != "NONSMOKER") |>
  filter(response != "None (0 cig)") |>
  filter( response != "NO") |>
  drop_na(response, geolocation) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses

cleaned_mat_race <- maternal_race |>
  janitor::clean_names() |>
  select(-data_value_std_err, -data_value_type) |>
  drop_na(response,geolocation) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses
  
no_alcohol_data_2007 = read_csv("./data/PRAM_2007_no_alcohol.csv")

no_tobacco_data_2007 = read_csv("./data/PRAM_2007_no_tobacco.csv")

contraception_data_2007 = read_csv("./data/PRAM_2007_contraception.csv")

# cleaned no alcohol data 

cleaned_no_alc_2007 <- no_alcohol_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_std_err, -geolocation, -data_value_type) |>
  drop_na(response)

view(cleaned_no_alc_2007)

# cleaned no tobacco data 

cleaned_no_tobacco_2007 <- no_tobacco_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_std_err, -geolocation, -data_value_type) |>
  drop_na(response)

# cleaned infant mortality 

cleaned_infant_mortality <- infant_mortality_df |>
  janitor::clean_names() |>
  select(-data_value_std_err, -data_value_type, -data_value_unit, -data_value_footnote_symbol, -data_value_footnote) |>
  drop_na(response, geolocation) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses

# cleaned conception

cleaned_contraception_2007 <- contraception_data_2007 |>
  janitor::clean_names() |>
  select(-data_value_std_err, -geolocation, -data_value_type) |>
  filter(response != "YES (CHECKED)") |>
  filter(response != "YES") |>
  drop_na(response)

# cleaned non conception

cleaned_no_contra_2007 <- no_contraception_data_2007 %>%
  janitor::clean_names() %>%
  select(-data_value_type) %>%
  drop_na(response) |>
  separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
   mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")),  # Convert to numeric and remove parentheses
         longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", "")))  # Convert to numeric and remove parentheses

Plot 1: Alcohol Consumption in relation to Infant Mortality

# Plot of question and responses for alcohol

cleaned_alc_2007 |>
  ggplot(aes(x = question, fill = response)) +
  geom_bar(position = "dodge") +
  labs(title = "Questions and Responses", x = "Questions", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(
    x = "Question",
    y = "Response",
    title =  "Questions vs Response of Alcohol Consumption"
  )

# creating "yes" variable 


# plot showing infant mortality rate vs alcohol consumption
ggplot() +
  geom_point(data = cleaned_alc_2007, aes(x = question, y = response), color = "blue", size = 3) +
  geom_point(data = cleaned_infant_mortality, aes(x = question, y = response), color = "red", size = 3) +
  labs(title = "Scatter Plot of Two Variables from Different Datasets",
       x = "X-axis Label",
       y = "Y-axis Label") +
  theme_minimal()

Plot 2: Tobacco Consumption in relation to Infant Mortality

Plot 3: No Consumption in relation to Infant Mortality

Map of Maternal Alcohol Use

leaflet() |> 
  addTiles() |> 
  addCircleMarkers(data = cleaned_alc_2007,
                   lng = ~longitude,  # Adjust column name if needed
                   lat = ~latitude,   # Adjust column name if needed
                   label = ~location_abbr,   # Assuming 'Group.1' is a column in your data
                   radius = 7,
                   color = "orange",
                   stroke = TRUE,
                   fillOpacity = 0.75,
                   popup = ~paste("Response:", response)) 
<<<<<<< HEAD
=======
>>>>>>> f719077d4826839fd0b793bb8f286a820ba51c9f

Map of Maternal Tobacco use

leaflet() |> 
  addTiles() |> 
  addCircleMarkers(data = cleaned_tobac_2007,
                   lng = ~longitude,  # Adjust column name if needed
                   lat = ~latitude,   # Adjust column name if needed
                   label = ~location_abbr,   # Assuming 'Group.1' is a column in your data
                   radius = 7,
                   color = "orange",
                   stroke = TRUE,
                   fillOpacity = 0.75,
                   popup = ~paste("Response:", response)) 
<<<<<<< HEAD
=======
>>>>>>> f719077d4826839fd0b793bb8f286a820ba51c9f

Map of Infant Mortality Rate

leaflet() |> 
  addTiles() |> 
  addCircleMarkers(data = cleaned_infant_mortality,
                   lng = ~longitude,  # Adjust column name if needed
                   lat = ~latitude,   # Adjust column name if needed
                   label = ~location_abbr,   # Assuming 'Group.1' is a column in your data
                   radius = 7,
                   color = "orange",
                   stroke = TRUE,
                   fillOpacity = 0.75,
                   popup = ~paste("Response:", response))
<<<<<<< HEAD
=======
>>>>>>> f719077d4826839fd0b793bb8f286a820ba51c9f

States with the most infant mortality rates

The plot above shows the locations of infant mortality rate across the US.

infant_deaths <- cleaned_infant_mortality |>
  filter(question == "Indicator of infant currently alive" & response == "NO") |>
  group_by(location_desc) |>
  summarize(total_infant_deaths = n()) |>
  kable()

print(infant_deaths)
## 
## 
## |location_desc            | total_infant_deaths|
## |:------------------------|-------------------:|
## |Alaska                   |                  45|
## |Arkansas                 |                  45|
## |Colorado                 |                  47|
## |Delaware                 |                  40|
## |Georgia                  |                  43|
## |Hawaii                   |                  45|
## |Illinois                 |                  47|
## |Maine                    |                  42|
## |Maryland                 |                  45|
## |Massachusetts            |                  44|
## |Michigan                 |                  43|
## |Minnesota                |                  41|
## |Missouri                 |                  42|
## |Nebraska                 |                  45|
## |New Jersey               |                  39|
## |New York (excluding NYC) |                  47|
## |New York City            |                  47|
## |North Carolina           |                  47|
## |Ohio                     |                  46|
## |Oklahoma                 |                  47|
## |Oregon                   |                  46|
## |Pennsylvania             |                   3|
## |Rhode Island             |                  46|
## |South Carolina           |                  47|
## |South Dakota             |                  43|
## |Utah                     |                  47|
## |Vermont                  |                  47|
## |Washington               |                  43|
## |West Virginia            |                  47|
## |Wisconsin                |                  40|
## |Wyoming                  |                  43|

The table provides a summary of total infant deaths by state, with each row representing a specific location. The location_desc column denotes the state, and the total_infant_deaths column indicates the corresponding number of infant deaths in each location. The data suggests variability in infant mortality rates across different regions, with some areas reporting higher or lower rates than others. For instance, states like Pennsylvania have a notably lower count of infant deaths, while others, such as Alaska and Arkansas, have higher counts. However, most of the data seemed to stay within the 35 to 50 range. This summary provides an overview of the distribution of infant deaths across various geographical locations.

filtered_mortality_race <- cleaned_infant_mortality %>%
  filter(break_out_category == "Maternal Race/Ethnicity" & (break_out == "Hispanic" | break_out == "Non-hispanic" | break_out == "White, non-Hispanic")) |>
  filter(question == "Indicator of infant currently alive" & response == "NO") 

print(filtered_mortality_race)
## # A tibble: 53 × 23
##     year location_abbr location_desc class   topic question data_source response
##    <dbl> <chr>         <chr>         <chr>   <chr> <chr>    <chr>       <chr>   
##  1  2007 UT            Utah          Infant… Preg… Indicat… PRAMS       NO      
##  2  2007 OR            Oregon        Infant… Preg… Indicat… PRAMS       NO      
##  3  2007 WA            Washington    Infant… Preg… Indicat… PRAMS       NO      
##  4  2007 YC            New York City Infant… Preg… Indicat… PRAMS       NO      
##  5  2007 OH            Ohio          Infant… Preg… Indicat… PRAMS       NO      
##  6  2007 ME            Maine         Infant… Preg… Indicat… PRAMS       NO      
##  7  2007 MD            Maryland      Infant… Preg… Indicat… PRAMS       NO      
##  8  2007 ME            Maine         Infant… Preg… Indicat… PRAMS       NO      
##  9  2007 MA            Massachusetts Infant… Preg… Indicat… PRAMS       NO      
## 10  2007 IL            Illinois      Infant… Preg… Indicat… PRAMS       NO      
## # ℹ 43 more rows
## # ℹ 15 more variables: data_value <dbl>, low_confidence_limit <dbl>,
## #   high_confidence_limit <dbl>, sample_size <dbl>, break_out <chr>,
## #   break_out_category <chr>, latitude <dbl>, longitude <dbl>, class_id <chr>,
## #   topic_id <chr>, question_id <chr>, location_id <dbl>, break_out_id <chr>,
## #   break_out_categoryid <chr>, response_id <chr>
view(filtered_mortality_race)

plot_infant_deaths <- ggplot(filtered_mortality_race, aes(x = break_out, fill = break_out)) +
  geom_bar() +
  labs(title = "Infant Deaths by Ethnicity",
       x = "Ethnicity",
       y = "Total Infant Deaths") +
  scale_fill_manual(values = c("Hispanic" = "red", "Non-hispanic" = "blue", "White, non-Hispanic" = "green")) +
  theme_minimal()

The plot_infant_deaths above shows a plot of infant deaths categorized by whether they were Hispanic or not. The graph shows that those who were not Hispanic had a higher infant death count than those who were Hispanic.